package com.sn.test;

import com.sn.utils.PoolingHttpClient;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class NY_demo {

    //获取新闻概要,链接等
    public static void main(String[] args) {
        //使用工具类获取client连接池,获取client对象来请求网页
        PoolingHttpClient httpClient=new PoolingHttpClient();
        String uri = "http://www.njupt.edu.cn/53/list.htm";
        String baseUri="http://www.njupt.edu.cn";
        String content = httpClient.doGetHtml(uri);

        //直接使用jsoup解析content
        Document doc = Jsoup.parse(content);
        //筛选 target=_blank
        Elements a = doc.getElementsByAttributeValue("target","_blank");
        Elements column_name = doc.getElementsByClass("Column_Name");
        String category = column_name.text();
        System.out.println("====================新闻类别"+category+"=====================");
        int size = a.size();
        int i=0;
        System.out.println(size);
        for (Element element : a) {
            String text = element.text();
            String href = element.attr("href");
            if(!StringUtils.isBlank(text)&&!href.startsWith("http")&&!href.startsWith("/_redirect?")&&!href.startsWith("mailto")&&!StringUtils.isBlank(href)){
                //二次筛选,经过分析,真正的新闻不是其他的链接,而是需要拼接学校官网再加上这里面的href值
                System.out.println("标题: "+text+"链接:  "+href+"转到:"+baseUri+href);
                i++;
            }
        }
    }
}
